package com.zc.bigdata.mapreduce;

import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.File;

/**
 * @program: hadoop-dfs
 * @description: 词频统计驱动类
 * @author: zengchen
 * @create: 2020-05-06 14:28
 **/
public class WordCountApp {

    public static void main(String[] args) throws Exception {
        System.setProperty("hadoop.home.dir","D://gitee//hadoop-2.6.0");// windows系统适配
        Configuration configuration = new Configuration();
//        configuration.set("fs.defaultFS","hdfs://");

        // 创建job
        Job job = Job.getInstance(configuration);

        // 设置驱动类
        job.setJarByClass(WordCountApp.class);

        // 设置自定义的mapper和reducer
        job.setMapperClass(WordCountMapper.class);
        job.setReducerClass(WordCountReducer.class);

        // 设置mapper端的聚合规则
        job.setCombinerClass(WordCountReducer.class);

        // 设置mapper的输出key,value类型和reducer的输出key,value类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        // 设置输入文件夹和输出文件夹
        FileInputFormat.setInputPaths(job, new Path("input\\wc\\"));
        FileUtils.deleteDirectory(new File("output\\wc")); // 保证目录不存在，如果已存在，mapreduce 会报错
        FileOutputFormat.setOutputPath(job, new Path("output\\wc"));

        // 执行job
        boolean result = job.waitForCompletion(true);
        System.out.println("处理结果：" + result);
    }
}
